import os
import re
from bs4 import BeautifulSoup


def replace_img_src(html_content):
    """替换HTML内容中img标签的src属性路径"""
    soup = BeautifulSoup(html_content, 'html.parser')
    img_tags = soup.find_all('img')

    for img in img_tags:
        src = img.get('src')
        # if src and src.startswith('D:\\aGetDataFrom111111\\'):
        #     # 替换路径前缀
        #     new_src = src.replace('D:\\aGetDataFrom111111\\', '..\\..\\')
        if src and src.startswith('img'):
            # 替换路径前缀
            new_src = src.replace('img', '..\\..\\img')
            img['src'] = new_src
            print(f"已替换图片路径: {src} → {new_src}")

    return str(soup)


def process_html_files(root_dir):
    """处理目录及其子目录下的所有HTML文件"""
    for dirpath, _, filenames in os.walk(root_dir):
        for filename in filenames:
            if filename.endswith('.html'):
                file_path = os.path.join(dirpath, filename)
                try:
                    # 读取文件内容
                    with open(file_path, 'r', encoding='utf-8') as file:
                        content = file.read()

                    # 替换图片路径
                    new_content = replace_img_src(content)

                    # 如果内容有变化，则写回文件
                    if new_content != content:
                        with open(file_path, 'w', encoding='utf-8') as file:
                            file.write(new_content)
                        print(f"已处理: {file_path}")
                    else:
                        print(f"未修改(未找到匹配路径): {file_path}")
                except Exception as e:
                    print(f"处理文件 {file_path} 时出错: {str(e)}")


if __name__ == "__main__":


    process_html_files(r"D:\aGetDataFrom111111\中储发展股份有限公司网站\05党群工作\党群工作-中储")
    print("处理完成!")